// SPDX-FileCopyrightText: 2009-2019 hmht
// SPDX-License-Identifier: LGPL-3.0-only
#include "8051_ass.h"

/*****************************************************************************\
 *              Architecture
 *
 * File Contents:
 * ## Section 1. Token parsers
 * ## Section 2: some weird datastructure
 * ## Section 3: token classifiers
 * ## Section 4: Generic instruction emitters
 * ## Section 5: Specific instruction parsing
 * ## Section 6: mnemonic token dispatcher
 * ## Section 7: rizin glue and mnemonic tokenization
 *
 * documentation date: 2019-10-04
 * documentation date: 2019-10-14
 *
 * 1. Token parsers
 *
 * I'm sure most of this is re-inventing the wheel, (poorly, too), this is
 * because I didn't take enough time to find a proper implementation.
 * If you know a r2lib function that does the job it should be used instead.
 *
 *
 * 2. Some weird datastructure
 *
 * Started out for matching strings whitespace-independently, and uses c99s
 * (struct literal){} notation and is zero-terminated.
 * I wrote this thing in the late hours of r2con2019 while jetlagged.
 *
 * Currently the last place it's used in is mnemonic matching, since I hacked
 * in a nr-of-arguments field into the table. Whitespace-independence is
 * currently a bug since it'll accept "n o p" as "nop"... also functions need
 * to be renamed.
 *
 * One pitfall is that the match is lazy (non-greedy?) in other words, "reti"
 * is matched by "ret", but not the other way around, so the most-specific match
 * must come first in the list.
 *
 *
 * 3. token classifiers
 *
 * right now mostly has functions to distinguish between argument types, and
 * parses their data. (Some argument types, such as relative addresses and bits,
 * require parsing to asses their validity.)
 *
 *
 * 4. Generic instruction parsing
 *
 * I started out just writing specific parser for each
 * mnemonic(-variant), and halfway through I started noticing a lot of
 * code duplication, so extracted some of it.
 *	Their basic operation is simple: dump whatever you're given into the out
 * parameter, and move the write pointer forward.
 *
 *
 * 5. Specific instruction parsing
 *
 * Of course, in the very beginning I started out with the idea to
 * completely generalize everything, but there were more edge cases than
 * my small brain could handle, so I scrapped that and started punching
 * out special parsers for each instruction variant mindlessly.
 *	The result of this approach is really glaring. Lots of duplication.
 * There's lots of easy deduplication opportunity, and now that it's finished I
 * have some ideas on how to do it better, but eh.
 *
 *
 * 6. mnemonic token dispatcher
 *
 * The weird datastructure returns! with macros! it's basically just a jump
 * table with one bit of validation.
 *
 *
 * 7. Radare2 glue and mnemonic tokenization
 *
 * Had one look at the gb glue code and copied the lot of it without really
 * understanding what I'm doing.
 *
 * also splits out the first word (asserted mnemonic) for the token dispatcher,
 * and splits up the arguments
 *
\*****************************************************************************/
#include <rz_util.h>
#include <string.h>

/******************************************************************************
 * ## Section 1. Generic Token parsers
 *               ------- -------------*/

static bool parse_hexadecimal(char const *hexstr, ut16 *out) {
	if (!hexstr || hexstr[0] != '0' || !(hexstr[1] == 'x' || hexstr[1] == 'X')) {
		return false;
	}
	*out = 0;
	char const *p;
	for (p = hexstr + 2; p < hexstr + 6 && *p; p += 1) {
		*out <<= 4;
		if ('0' <= *p && *p <= '9') {
			*out |= *p - '0';
		} else if ('a' <= *p && *p <= 'f') {
			*out |= *p - 'a' + 10;
		} else if ('A' <= *p && *p <= 'F') {
			*out |= *p - 'A' + 10;
		} else {
			return false;
		}
	}
	return !*p;
}

// FIXME: may write outside buffer
/**
 * splits up the given string into multiple chucks, separated by unquoted
 * commas. It will then copy chunk n-1 into dest, with the leading and trailing
 * whitespace stripped.
 *
 * if chunk n-1 does not exist or is empty, it will return false;
 *
 * only text before newlines, NUL, and unquoted semicolons is chunked.
 *
 * any text after a single-quote and before the next single-quote is considered
 * quoted. There is no escaping.
 */
static bool get_arg(char const *multi, int n, char *dest) {
	char *lastnonws = dest;
	bool anynonws = false;
	bool in_string = false;
	n -= 1;
	if (!multi)
		return false;
	while (n && *multi && *multi != '\n' && *multi != '\r') {
		if (*multi == '\'')
			in_string = !in_string;
		if (!in_string) {
			if (*multi == ';') {
				return false;
			}
			if (*multi == ',') {
				multi += 1;
				n -= 1;
				continue;
			}
		}
		multi += 1;
	}
	if (!*multi || *multi == '\n' || *multi == '\r' || in_string) {
		return false;
	}

	while (*multi && (*multi == ' ' || *multi == '\t')) {
		multi += 1;
	}

	while (*multi && *multi != '\n' && *multi != '\r') {
		if (*multi == '\'')
			in_string = !in_string;
		if (!in_string) {
			if (*multi == ';' || *multi == ',') {
				break;
			}
			if (*multi != ' ' && *multi != '\t') {
				lastnonws = dest;
				anynonws = true;
			}
			*dest = *multi;
			dest += 1;
			multi += 1;
		}
	}

	if (in_string)
		return false;

	if (!anynonws) {
		*dest = '\0';
		return false;
	}
	*(lastnonws + 1) = '\0';
	return true;
}

/**
 * tokenizes the argument list
 * arg parameter must be 3 char pointers wide.
 * TODO: merge with get_arg, as this is now the only user
 */
static int get_arguments(char **arg, char const *arguments) {
	size_t arglen = strlen(arguments) + 1;
	char *tmp = malloc(arglen);
	if (!get_arg(arguments, 1, tmp)) {
		free(tmp);
		tmp = 0;
		return 0;
	} else {
		arg[0] = realloc(tmp, strlen(tmp) + 1);
		tmp = 0;
		tmp = malloc(arglen);
		if (!get_arg(arguments, 2, tmp)) {
			free(tmp);
			tmp = 0;
			return 1;
		} else {
			arg[1] = realloc(tmp, strlen(tmp) + 1);
			tmp = 0;
			tmp = malloc(arglen + 1);
			if (!get_arg(arguments, 3, tmp)) {
				free(tmp);
				tmp = 0;
				return 2;
			} else {
				arg[2] = realloc(tmp, strlen(tmp) + 1);
				tmp = 0;
				tmp = malloc(arglen + 1);
				if (get_arg(arguments, 4, tmp)) {
					free(tmp);
					tmp = 0;
					free(arg[0]);
					arg[0] = 0;
					free(arg[1]);
					arg[1] = 0;
					free(arg[2]);
					arg[2] = 0;
					return 4;
				}
				free(tmp);
				tmp = 0;
				return 3;
			}
		}
	}
}

/**
 * returns true if there is no more valid assembly code after this character
 */
static bool terminates_asm_line(char c) {
	return c == '\0' || c == '\n' || c == '\r' || c == ';';
}

/**
 * Like rz_str_casecmp, but ignores all isspace characters
 */
static int str_iwhitecasecmp(char const *a, char const *b) {
	if (!a && !b) {
		return *a - *b;
	}
	while (a && b) {
		if (!*a && !*b) {
			break;
		}
		if (!*a || !*b) {
			break;
		}
		if (isspace(*a)) {
			a += 1;
			continue;
		}
		if (isspace(*b)) {
			b += 1;
			continue;
		}
		if (tolower(*a) == tolower(*b)) {
			a += 1;
			b += 1;
			continue;
		}
		break;
	}
	return *a - *b;
}

/******************************************************************************
 * ## Section 2: some weird datastructure
                 ------------------------*/

typedef bool (*parse_mnem_args)(char const *const *, ut16, ut8 **);

typedef struct {
	char const *const pattern;
	parse_mnem_args res;
	int args;
} ftable[];

static bool pattern_match(char const *str, char const *pattern) {
	int si = 0;
	int ti = 0;
	if (!pattern) {
		return true;
	}

	while (pattern[ti] != '\0') {
		while (isspace(str[si]) && !isspace(pattern[ti])) {
			si += 1;
		}
		if (isspace(pattern[ti])) {
			ti += 1;
			continue;
		}
		if (tolower(pattern[ti]) == tolower(str[si])) {
			si += 1;
			ti += 1;
		} else {
			return false;
		}
	}
	return true;
}

static parse_mnem_args match_prefix_f(int *args, char const *str, ftable const tbl) {
	int row = 0;
	while (tbl[row].pattern) {
		if (pattern_match(str, tbl[row].pattern)) {
			*args = tbl[row].args;
			return tbl[row].res;
		} else {
			row += 1;
		}
	}
	*args = tbl[row].args;
	return tbl[row].res;
}

/******************************************************************************
 * ## Section 3: token classifiers
                 -----------------*/

/**
 * matches registers r0 and r1 when they are indirectly-addressed.
 * 8051-style syntax @r0, but also r2 defacto [r0]
 */
static bool is_indirect_reg(char const *str) {
	if (!str) {
		return false;
	}

	if (str[0] == '@') {
		return rz_str_ansi_nlen(str, 4) == 3 && tolower(str[1]) == 'r' && (str[2] == '0' || str[2] == '1');
	}

	if (str[0] == '[') {
		return rz_str_ansi_nlen(str, 5) == 4 && tolower(str[1]) == 'r' && (str[2] == '0' || str[2] == '1') && str[3] == ']';
	}

	return false;
}

/**
 * returns true if the given string denotes an 'r'-register
 */
static bool is_reg(char const *str) {
	return str && tolower(str[0]) == 'r' && rz_str_ansi_nlen(str, 3) == 2 && '0' <= str[1] && str[1] <= '7';
}

/**
 * returns true if the given number is a valid relative address from the given
 *	pc, the relative address is stored in the *out parameter.
 */
static bool relative_address(ut16 pc, ut16 address, ut8 *out) {
	st16 diff = address - (pc + 2);
	if (diff < INT8_MIN || INT8_MAX < diff) {
		return false;
	} else {
		*out = diff;
		return true;
	}
}

static bool resolve_immediate(char const *imm_str, ut16 *imm_out) {
	// rz_asm resolves symbols, so does this really only need to parse hex?
	// maybe TODO: skip leading '#' if exists?
	return parse_hexadecimal(imm_str, imm_out);
}

static bool to_address(char const *addr_str, ut16 *addr_out) {
	// rz_asm resolves symbols, so does this really only need to parse hex?
	// maybe TODO: check address bounds?
	return parse_hexadecimal(addr_str, addr_out);
}

/**
 * attempts to parse the given string as an 8bit-wide address
 */
static bool address_direct(char const *addr_str, ut8 *addr_out) {
	ut16 addr_big;
	// rz_asm resolves symbols, so does this really only need to parse hex?
	// maybe TODO: check address bounds?
	if (!parse_hexadecimal(addr_str, &addr_big) || (0xFF < addr_big)) {
		return false;
	}
	*addr_out = addr_big;
	return true;
}

/**
 * attempts to parse the given string as a bit-address
 */
static bool address_bit(char const *addr_str, ut8 *addr_out) {
	char *bitpart = malloc(strlen(addr_str) + 1);
	char *bytepart = malloc(strlen(addr_str) + 1);
	char const *separator = rz_str_lchr(addr_str, '.');
	ut8 byte;
	int bit;
	bool ret = false;
	// TODO: check if symbols are resolved properly in all cases:
	// - symbol.2
	// - 0x25.symbol
	// - symbol.symbol
	// - symbol
	if (!separator) {
		goto end;
	}
	rz_str_ncpy(bytepart, addr_str, separator - addr_str + 1);
	bytepart[separator - addr_str + 1] = '\0';
	rz_str_ncpy(bitpart, separator + 1, strlen(separator));
	if (!address_direct(bytepart, &byte)) {
		goto end;
	}
	if (1 < strlen(bitpart) || bitpart[0] < '0' || '7' < bitpart[0]) {
		ret = false;
		goto end;
	}
	bit = bitpart[0] - '0';
	if (0x20 <= byte && byte < 0x30) {
		*addr_out = (byte - 0x20) * 8 + bit;
		ret = true;
	} else if (0x80 <= byte && !(byte % 8)) {
		*addr_out = byte + bit;
		ret = true;
	}
end:
	free(bitpart);
	bitpart = 0;
	free(bytepart);
	bytepart = 0;
	return ret;
}

/**
 * figures out which register is denoted by the given string
 * returns 8 if invalid
 */
static int register_number(char const *reg) {
	if (is_reg(reg)) {
		return reg[1] - '0';
	}
	if (is_indirect_reg(reg)) {
		return reg[2] - '0';
	}
	return 8; // not register 0-7, so...
}

/******************************************************************************
 * ## Section 4: Generic instruction emmiters
                 ----------------------------*/

static bool single_byte_instr(ut8 const instr, ut8 **out) {
	(*out)[0] = instr;
	*out += 1;
	return true;
}

static bool singlearg_bit(ut8 const firstbyte, char const *arg, ut8 **out) {
	ut8 address;
	if (!address_bit(arg, &address)) {
		return false;
	}
	(*out)[0] = firstbyte;
	(*out)[1] = address;
	*out += 2;
	return true;
}

static bool singlearg_reladdr(ut8 const firstbyte, char const *arg, ut16 const pc, ut8 **out) {
	ut16 address;
	if (!to_address(arg, &address) || !relative_address(pc, address, (*out) + 1)) {
		return false;
	}
	(*out)[0] = firstbyte;
	*out += 2;
	return true;
}

static bool singlearg_direct(ut8 const firstbyte, char const *arg, ut8 **out) {
	ut8 address;
	if (!address_direct(arg, &address)) {
		return false;
	}
	(*out)[0] = firstbyte;
	(*out)[1] = address;
	*out += 2;
	return true;
}

static bool singlearg_immediate(ut8 firstbyte, char const *imm_str, ut8 **out) {
	ut16 imm;
	if (imm_str[0] != '#' || !resolve_immediate(imm_str + 1, &imm)) {
		return false;
	}
	(*out)[0] = firstbyte;
	(*out)[1] = imm & 0x00FF;
	*out += 2;
	return true;
}

static bool singlearg_register(ut8 firstbyte, char const *reg, ut8 **out) {
	return single_byte_instr(firstbyte | register_number(reg), out);
}

static bool single_a_arg_instr(ut8 const firstbyte, char const *arg, ut8 **out) {
	if (rz_str_casecmp("a", arg)) {
		return false;
	}
	return single_byte_instr(firstbyte, out);
}

/******************************************************************************
 * ## Section 5: Specific instruction parsing
                 ----------------------------*/

static bool mnem_acall(char const *const *arg, ut16 pc, ut8 **out) {
	ut16 address;
	if (!to_address(arg[0], &address)) {
		return false;
	}
	(*out)[0] = ((address & 0x0700) >> 3) | 0x11;
	(*out)[1] = address & 0x00FF;
	*out += 2;
	return true;
}

static bool mnem_add(char const *const *arg, ut16 pc, ut8 **out) {
	if (rz_str_casecmp(arg[0], "a")) {
		return false;
	}
	switch (arg[1][0]) {
	case '@':
	case '[':
		return singlearg_register(0x26, arg[1], out);
		break;
	case '#':
		return singlearg_immediate(0x24, arg[1], out);
	}
	if (is_reg(arg[1])) {
		return singlearg_register(0x28, arg[1], out);
	} else {
		return singlearg_direct(0x25, arg[1], out);
	}
}

static bool mnem_addc(char const *const *arg, ut16 pc, ut8 **out) {
	if (rz_str_casecmp(arg[0], "a")) {
		return false;
	}
	if (is_indirect_reg(arg[1])) {
		return singlearg_register(0x36, arg[1], out);
	}
	if (arg[1][0] == '#') {
		return singlearg_immediate(0x34, arg[1], out);
	}
	if (is_reg(arg[1])) {
		return singlearg_register(0x38, arg[1], out);
	}
	return singlearg_direct(0x35, arg[1], out);
}

static bool mnem_ajmp(char const *const *arg, ut16 pc, ut8 **out) {
	ut16 address;
	if (!to_address(arg[0], &address)) {
		return false;
	}
	(*out)[0] = ((address & 0x0700) >> 3) | 0x01;
	(*out)[1] = address & 0x00FF;
	*out += 2;
	return true;
}

static bool mnem_anl(char const *const *arg, ut16 pc, ut8 **out) {
	if (!strcmp(arg[0], "c")) {
		if (arg[1][0] == '/') {
			return singlearg_bit(0xb0, arg[1] + 1, out);
		}
		return singlearg_bit(0x82, arg[1], out);
	}
	if (!strcmp(arg[0], "a")) {
		if (is_indirect_reg(arg[1])) {
			return singlearg_register(0x56, arg[1], out);
		}
		if (arg[1][0] == '#') {
			return singlearg_immediate(0x54, arg[1], out);
		}
		if (is_reg(arg[1])) {
			return singlearg_register(0x58, arg[1], out);
		}
		return singlearg_direct(0x55, arg[1], out);
	}

	ut8 address;
	if (!address_direct(arg[0], &address)) {
		return false;
	}
	if (!rz_str_casecmp(arg[1], "a")) {
		return singlearg_direct(0x52, arg[0], out);
	}
	ut16 imm;
	if (arg[1][0] != '#' || !resolve_immediate(arg[1] + 1, &imm)) {
		return false;
	}
	(*out)[0] = 0x53;
	(*out)[1] = address;
	(*out)[2] = imm & 0x00FF;
	*out += 3;
	return true;
}

static bool mnem_cjne(char const *const *arg, ut16 pc, ut8 **out) {
	ut16 address;
	if (!to_address(arg[2], &address) || !relative_address(pc + 1, address, (*out) + 2)) {
		return false;
	}
	if (!rz_str_casecmp(arg[0], "a")) {
		if (arg[1][0] == '#') {
			ut16 imm;
			if (!resolve_immediate(arg[1] + 1, &imm)) {
				return false;
			}
			(*out)[0] = 0xb4;
			(*out)[1] = imm & 0x00FF;
			// out[2] set earlier
			*out += 3;
			return true;
		}
		ut8 address;
		if (!address_direct(arg[1], &address)) {
			return false;
		}
		(*out)[0] = 0xb5;
		(*out)[1] = address;
		// out[2] set earlier
		*out += 3;
		return true;
	}
	if (is_reg(arg[0])) {
		ut16 imm;
		if (!resolve_immediate(arg[1] + 1, &imm)) {
			return false;
		}
		(*out)[0] = 0xbf | register_number(arg[0]);
		(*out)[1] = imm & 0x00FF;
		// out[2] set earlier
		*out += 3;
		return true;
	}
	if (is_indirect_reg(arg[0])) {
		ut16 imm;
		if (!resolve_immediate(arg[1] + 1, &imm)) {
			return false;
		}
		(*out)[0] = 0xb6 | register_number(arg[0]);
		(*out)[1] = imm & 0x00FF;
		// out[2] set earlier
		*out += 3;
		return true;
	}
	return false;
}

static bool mnem_clr(char const *const *arg, ut16 pc, ut8 **out) {
	if (!rz_str_casecmp("a", arg[0])) {
		return single_byte_instr(0xe4, out);
	}
	if (!rz_str_casecmp("c", arg[0])) {
		return single_byte_instr(0xc3, out);
	}
	return singlearg_bit(0xc2, arg[0], out);
}

static bool mnem_cpl(char const *const *arg, ut16 pc, ut8 **out) {
	if (!rz_str_casecmp("a", arg[0])) {
		return single_byte_instr(0xf4, out);
	}
	if (!rz_str_casecmp("c", arg[0])) {
		return single_byte_instr(0xb3, out);
	}
	return singlearg_bit(0xb2, arg[0], out);
}

static bool mnem_da(char const *const *arg, ut16 pc, ut8 **out) {
	return single_a_arg_instr(0xd4, arg[0], out);
}

static bool mnem_dec(char const *const *arg, ut16 pc, ut8 **out) {
	if (is_indirect_reg(arg[0])) {
		return singlearg_register(0x16, arg[0], out);
	}
	if (is_reg(arg[0])) {
		return singlearg_register(0x18, arg[0], out);
	}
	if (!rz_str_casecmp("a", arg[0])) {
		return single_byte_instr(0x14, out);
	}
	return singlearg_direct(0x15, arg[0], out);
}

static bool mnem_div(char const *const *arg, ut16 pc, ut8 **out) {
	if (rz_str_casecmp("ab", arg[0])) {
		return false;
	}
	return single_byte_instr(0x84, out);
}

static bool mnem_djnz(char const *const *arg, ut16 pc, ut8 **out) {
	ut16 jmp_address;
	if (!to_address(arg[1], &jmp_address)) {
		return false;
	}
	if (!relative_address(pc, jmp_address, (*out) + 2)) {
		return false;
	}

	if (is_reg(arg[0])) {
		(*out)[0] = 0xd8 | register_number(arg[0]);
		(*out)[1] = (*out)[2];
		*out += 2;
		return true;
	}
	ut8 dec_address;
	if (!address_direct(arg[0], &dec_address)) {
		return false;
	}
	(*out)[0] = 0xd5;
	(*out)[1] = dec_address;
	(*out)[2] -= 1;
	*out += 3;
	return true;
}

static bool mnem_inc(char const *const *arg, ut16 pc, ut8 **out) {
	if (is_reg(arg[0])) {
		return singlearg_register(0x08, arg[0], out);
	}
	if (is_indirect_reg(arg[0])) {
		return singlearg_register(0x06, arg[0], out);
	}
	if (!rz_str_casecmp("a", arg[0])) {
		return single_byte_instr(0x04, out);
	}
	if (!rz_str_casecmp("dptr", arg[0])) {
		return single_byte_instr(0xa3, out);
	}
	return singlearg_direct(0x05, arg[0], out);
}

static bool mnem_jb(char const *const *arg, ut16 pc, ut8 **out) {
	ut8 cmp_addr;
	if (!address_bit(arg[0], &cmp_addr)) {
		return false;
	}
	ut16 jmp_addr;
	if (!to_address(arg[1], &jmp_addr) || !relative_address(pc + 1, jmp_addr, (*out) + 2)) {
		return false;
	}
	(*out)[0] = 0x20;
	(*out)[1] = cmp_addr;
	// out[2] set earlier
	*out += 3;
	return true;
}

static bool mnem_jbc(char const *const *arg, ut16 pc, ut8 **out) {
	ut8 cmp_addr;
	if (!address_bit(arg[0], &cmp_addr)) {
		return false;
	}
	ut16 jmp_addr;
	if (!to_address(arg[1], &jmp_addr) || !relative_address(pc + 1, jmp_addr, (*out) + 2)) {
		return false;
	}
	(*out)[0] = 0x10;
	(*out)[1] = cmp_addr;
	// out[2] set earlier
	*out += 3;
	return true;
}

static bool mnem_jc(char const *const *arg, ut16 pc, ut8 **out) {
	return singlearg_reladdr(0x40, arg[0], pc, out);
}

static bool mnem_jnb(char const *const *arg, ut16 pc, ut8 **out) {
	ut8 cmp_addr;
	if (!address_bit(arg[0], &cmp_addr)) {
		return false;
	}
	ut16 jmp_addr;
	if (!to_address(arg[1], &jmp_addr) || !relative_address(pc + 1, jmp_addr, (*out) + 2)) {
		return false;
	}
	(*out)[0] = 0x30;
	(*out)[1] = cmp_addr;
	// out[2] set earlier
	*out += 3;
	return true;
}

static bool mnem_jnc(char const *const *arg, ut16 pc, ut8 **out) {
	return singlearg_reladdr(0x50, arg[0], pc, out);
}

static bool mnem_jnz(char const *const *arg, ut16 pc, ut8 **out) {
	return singlearg_reladdr(0x70, arg[0], pc, out);
}

static bool mnem_jz(char const *const *arg, ut16 pc, ut8 **out) {
	return singlearg_reladdr(0x60, arg[0], pc, out);
}

static bool mnem_lcall(char const *const *arg, ut16 pc, ut8 **out) {
	ut16 address;
	if (!to_address(arg[0], &address)) {
		return false;
	}
	(*out)[0] = 0x12;
	(*out)[1] = ((address & 0xFF00) >> 8) & 0x00FF;
	(*out)[2] = address & 0x00FF;
	*out += 3;
	return true;
}

static bool mnem_ljmp(char const *const *arg, ut16 pc, ut8 **out) {
	ut16 address;
	if (!to_address(arg[0], &address)) {
		return false;
	}
	(*out)[0] = 0x02;
	(*out)[1] = ((address & 0xFF00) >> 8) & 0x00FF;
	(*out)[2] = address & 0x00FF;
	*out += 3;
	return true;
}

static bool mnem_mov_c(char const *const *arg, ut16 pc, ut8 **out) {
	return singlearg_bit(0xa2, arg[1], out);
}

static bool mnem_mov(char const *const *arg, ut16 pc, ut8 **out) {
	if (!rz_str_casecmp(arg[0], "dptr")) {
		ut16 imm;
		if (!resolve_immediate(arg[1] + 1, &imm)) {
			return false;
		}
		(*out)[0] = 0x90;
		(*out)[1] = imm >> 8;
		(*out)[2] = imm;
		*out += 3;
		return true;
	}
	if (is_indirect_reg(arg[0])) {
		if (!rz_str_casecmp(arg[1], "a")) {
			return singlearg_register(0xf6, arg[0], out);
		}
		if (arg[1][0] != '#') {
			return singlearg_direct(
				0xa6 | register_number(arg[0]), arg[1], out);
		}
		return singlearg_immediate(0x76 | register_number(arg[0]), arg[1], out);
	}
	if (!rz_str_casecmp(arg[0], "a")) {
		if (is_indirect_reg(arg[1])) {
			return singlearg_register(0xe6, arg[1], out);
		}
		if (is_reg(arg[1])) {
			return singlearg_register(0xe8, arg[1], out);
		}
		if (arg[1][0] == '#') {
			return singlearg_immediate(0x74, arg[1], out);
		}
		return singlearg_direct(0xe5, arg[1], out);
	}
	if (is_reg(arg[0])) {
		if (!rz_str_casecmp(arg[1], "a")) {
			return singlearg_register(0xf8, arg[0], out);
		}
		if (arg[1][0] == '#') {
			return singlearg_immediate(
				0x78 | register_number(arg[0]), arg[1], out);
		}
		return singlearg_direct(0xa8 | register_number(arg[0]), arg[1], out);
	}
	if (!rz_str_casecmp(arg[1], "c")) {
		return singlearg_bit(0x92, arg[0], out);
	}
	if (!rz_str_casecmp(arg[1], "a")) {
		return singlearg_direct(0xf5, arg[0], out);
	}
	if (is_reg(arg[1])) {
		return singlearg_direct(0x88 | register_number(arg[1]), arg[0], out);
	}
	if (is_indirect_reg(arg[1])) {
		return singlearg_direct(0x86 | register_number(arg[1]), arg[0], out);
	}
	ut8 dest_addr;
	if (!address_direct(arg[0], &dest_addr)) {
		return false;
	}
	if (arg[1][0] == '#') {
		ut16 imm;
		if (!resolve_immediate(arg[1] + 1, &imm)) {
			return false;
		}
		(*out)[0] = 0x75;
		(*out)[1] = dest_addr;
		(*out)[2] = imm & 0x00FF;
		*out += 3;
		return true;
	}
	ut8 src_addr;
	if (!address_direct(arg[1], &src_addr)) {
		return false;
	}
	(*out)[0] = 0x85;
	(*out)[1] = src_addr;
	(*out)[2] = dest_addr;
	*out += 3;
	return true;
}

static bool mnem_movc(char const *const *arg, ut16 pc, ut8 **out) {
	if (rz_str_casecmp(arg[0], "a")) {
		return false;
	}
	if (!str_iwhitecasecmp(arg[1], "@a+dptr") || !str_iwhitecasecmp(arg[1], "[a+dptr]")) {
		return single_byte_instr(0x93, out);
	}
	if (!str_iwhitecasecmp(arg[1], "@a+pc") || !str_iwhitecasecmp(arg[1], "[a+pc]")) {
		return single_byte_instr(0x83, out);
	}
	return false;
}

static bool mnem_movx(char const *const *arg, ut16 pc, ut8 **out) {
	if (!rz_str_casecmp(arg[0], "a")) {
		if (is_indirect_reg(arg[1])) {
			return singlearg_register(0xe2, arg[1], out);
		}
		if (!str_iwhitecasecmp(arg[1], "@dptr") || !str_iwhitecasecmp(arg[1], "[dptr]")) {
			return single_byte_instr(0xe0, out);
		}
	}
	if (rz_str_casecmp(arg[1], "a")) {
		return false;
	}
	if (is_indirect_reg(arg[0])) {
		return singlearg_register(0xf2, arg[0], out);
	}
	if (!str_iwhitecasecmp(arg[0], "@dptr") || !str_iwhitecasecmp(arg[0], "[dptr]")) {
		return single_byte_instr(0xf0, out);
	}
	return false;
}

static bool mnem_mul(char const *const *arg, ut16 pc, ut8 **out) {
	if (rz_str_ncasecmp("ab", arg[0], 3)) {
		return false;
	}
	return single_byte_instr(0xa4, out);
}

static bool mnem_nop(char const *const *arg, ut16 pc, ut8 **out) {
	return single_byte_instr(0x00, out);
}

static bool mnem_orl(char const *const *arg, ut16 pc, ut8 **out) {
	if (!rz_str_casecmp(arg[0], "c")) {
		if (arg[1][0] == '/') {
			return singlearg_bit(0xa0, arg[1] + 1, out);
		}
		return singlearg_bit(0x72, arg[1], out);
	}
	if (!rz_str_casecmp(arg[0], "a")) {
		if (is_indirect_reg(arg[1])) {
			return singlearg_register(0x46, arg[1], out);
		}
		if (arg[1][0] == '#') {
			return singlearg_immediate(0x44, arg[1], out);
		}
		if (is_reg(arg[1])) {
			return singlearg_register(0x48, arg[1], out);
		}
		return singlearg_direct(0x45, arg[1], out);
	}

	if (arg[1][0] != '#') {
		return singlearg_direct(0x42, arg[0], out);
	}

	ut8 dest_addr;
	if (!address_direct(arg[0], &dest_addr)) {
		return false;
	}
	ut16 imm;
	if (!resolve_immediate(arg[1] + 1, &imm)) {
		return false;
	}
	(*out)[0] = 0x43;
	(*out)[1] = dest_addr;
	(*out)[2] = imm & 0x00FF;
	*out += 3;
	return true;
}

static bool mnem_pop(char const *const *arg, ut16 pc, ut8 **out) {
	return singlearg_direct(0xd0, arg[0], out);
}

static bool mnem_push(char const *const *arg, ut16 pc, ut8 **out) {
	return singlearg_direct(0xc0, arg[0], out);
}

static bool mnem_ret(char const *const *arg, ut16 pc, ut8 **out) {
	return single_byte_instr(0x22, out);
}

static bool mnem_reti(char const *const *arg, ut16 pc, ut8 **out) {
	return single_byte_instr(0x32, out);
}

static bool mnem_rl(char const *const *arg, ut16 pc, ut8 **out) {
	return single_a_arg_instr(0x23, arg[0], out);
}

static bool mnem_rlc(char const *const *arg, ut16 pc, ut8 **out) {
	return single_a_arg_instr(0x33, arg[0], out);
}

static bool mnem_rr(char const *const *arg, ut16 pc, ut8 **out) {
	return single_a_arg_instr(0x03, arg[0], out);
}

static bool mnem_rrc(char const *const *arg, ut16 pc, ut8 **out) {
	return single_a_arg_instr(0x13, arg[0], out);
}

static bool mnem_setb(char const *const *arg, ut16 pc, ut8 **out) {
	if (!rz_str_casecmp("c", arg[0])) {
		return single_byte_instr(0xd3, out);
	}
	return singlearg_bit(0xd2, arg[0], out);
}

static bool mnem_sjmp(char const *const *arg, ut16 pc, ut8 **out) {
	return singlearg_reladdr(0x80, arg[0], pc, out);
}

static bool mnem_jmp(char const *const *arg, ut16 pc, ut8 **out) {
	if (!str_iwhitecasecmp(arg[0], "@a+dptr") || !str_iwhitecasecmp(arg[0], "[a+dptr]")) {
		return single_byte_instr(0x73, out);
	}

	ut16 address;
	if (!to_address(arg[0], &address)) {
		return false;
	}
	ut16 reladdr;
	if (pc < address) {
		reladdr = address - pc;
	} else {
		reladdr = pc - address;
	}

	if (reladdr < 0x100) {
		return mnem_sjmp(arg, pc, out);
	} else if (reladdr < 0x08FF) {
		return mnem_ajmp(arg, pc, out);
	} else {
		return mnem_ljmp(arg, pc, out);
	}
}

static bool mnem_subb(char const *const *arg, ut16 pc, ut8 **out) {
	if (rz_str_casecmp(arg[0], "a")) {
		return false;
	}
	if (is_indirect_reg(arg[1])) {
		return singlearg_register(0x96, arg[1], out);
	}
	if (arg[1][0] == '#') {
		return singlearg_immediate(0x94, arg[1], out);
	}
	if (is_reg(arg[1])) {
		return singlearg_register(0x98, arg[1], out);
	}
	return singlearg_direct(0x95, arg[1], out);
}

static bool mnem_swap(char const *const *arg, ut16 pc, ut8 **out) {
	return single_a_arg_instr(0xc4, arg[0], out);
}

static bool mnem_xrl(char const *const *arg, ut16 pc, ut8 **out) {
	if (!rz_str_casecmp(arg[0], "a")) {
		if (is_indirect_reg(arg[1])) {
			return singlearg_register(0x66, arg[1], out);
		}
		if (arg[1][0] == '#') {
			return singlearg_immediate(0x64, arg[1], out);
		}
		if (is_reg(arg[1])) {
			return singlearg_register(0x68, arg[1], out);
		}
		return singlearg_direct(0x65, arg[1], out);
	}
	if (arg[1][0] != '#') {
		if (rz_str_casecmp(arg[1], "a")) {
			return false;
		}
		return singlearg_direct(0x62, arg[0], out);
	}
	ut8 dest_addr;
	if (!address_direct(arg[0], &dest_addr)) {
		return false;
	}
	ut16 imm;
	if (!resolve_immediate(arg[1] + 1, &imm)) {
		return false;
	}
	(*out)[0] = 0x63;
	(*out)[1] = dest_addr;
	(*out)[2] = imm & 0x00FF;
	*out += 3;
	return true;
}

static bool mnem_xch(char const *const *arg, ut16 pc, ut8 **out) {
	if (rz_str_casecmp(arg[0], "a")) {
		return false;
	}
	if (is_indirect_reg(arg[1])) {
		return singlearg_register(0xc6, arg[1], out);
	}
	if (is_reg(arg[1])) {
		return singlearg_register(0xc8, arg[1], out);
	}
	return singlearg_direct(0xc5, arg[1], out);
}

static bool mnem_xchd(char const *const *arg, ut16 pc, ut8 **out) {
	if (rz_str_casecmp(arg[0], "a")) {
		return false;
	}
	if (!is_indirect_reg(arg[1])) {
		return false;
	}
	return singlearg_register(0xd6, arg[1], out);
}

/******************************************************************************
 * ## Section 6: mnemonic token dispatcher
                 -------------------------*/

static parse_mnem_args mnemonic(char const *user_asm, int *nargs) {
	return match_prefix_f(nargs, user_asm, (ftable){
#define mnem(n, mn)      { #mn " ", &mnem_##mn, n },
#define zeroarg_mnem(mn) { #mn, &mnem_##mn, 0 },
						       mnem(1, acall) mnem(2, addc) mnem(2, add) mnem(1, ajmp) mnem(2, anl) mnem(3, cjne) mnem(1, clr) mnem(1, cpl) mnem(1, da) mnem(1, dec) mnem(1, div) mnem(2, djnz) mnem(1, inc) mnem(2, jbc) mnem(2, jb) mnem(1, jc) mnem(1, jmp) mnem(2, jnb) mnem(1, jnc) mnem(1, jz) mnem(1, jnz) mnem(1, lcall) mnem(1, ljmp)
						       /* so uh, the whitespace-independent matching sees movc and mov c as the same
 * thing...
 * My first thought was to add an exception for mov c, but later I saw that it'd
 * be better to match the space after each instruction, but the exception is
 * still here
 */
						       { "mov c,", &mnem_mov_c, 2 },
						       mnem(2, movc) mnem(2, movx) mnem(2, mov) mnem(1, mul) mnem(2, orl) mnem(1, pop) mnem(1, push) mnem(2, xchd) mnem(2, xch) mnem(2, xrl) mnem(1, rlc) mnem(1, rl) mnem(1, rrc) mnem(1, rr) mnem(1, setb) mnem(1, sjmp) mnem(2, subb) mnem(1, swap) zeroarg_mnem(nop) zeroarg_mnem(reti) zeroarg_mnem(ret)
#undef mnem
							       { 0 } });
}

/******************************************************************************
 * ## Section 7: rizin glue and mnemonic tokenization
                 --------------------------------------*/

int assemble_8051(RzAsm *a, RzAsmOp *op, char const *user_asm) {
	if (!a || !op || !user_asm) {
		return 0;
	}
	rz_strbuf_set(&op->buf_asm, user_asm);
	while (!terminates_asm_line(*user_asm) && (*user_asm == ' ' || *user_asm == '\t')) {
		user_asm += 1;
	}
	char const *arguments = user_asm;
	while (!terminates_asm_line(*arguments) && (('a' <= *arguments && *arguments <= 'z') || ('A' <= *arguments && *arguments <= 'Z'))) {
		arguments += 1;
	}
	while (!terminates_asm_line(*arguments) && (*arguments == ' ' || *arguments == '\t')) {
		arguments += 1;
	}
	char *arg[3] = { 0 };
	int nr_of_arguments = get_arguments(arg, arguments);
	char const *carg[3] = { arg[0], arg[1], arg[2] }; /* aliasing pointers...
		I need to pass char const *s, but I can't free char const *s
		not without compiler warnings, at least */
	int wants_arguments;
	parse_mnem_args mnem = mnemonic(user_asm, &wants_arguments);
	if (!mnem || nr_of_arguments != wants_arguments) {
		free(arg[2]);
		arg[2] = 0;
		carg[2] = 0;
		free(arg[1]);
		arg[1] = 0;
		carg[1] = 0;
		free(arg[0]);
		arg[0] = 0;
		carg[0] = 0;
		return 0;
	}
	ut8 instr[4] = { 0 };
	ut8 *binp = instr;
	if (!mnem(carg, a->pc, &binp)) {
		free(arg[0]);
		arg[0] = 0;
		carg[2] = 0;
		free(arg[1]);
		arg[1] = 0;
		carg[1] = 0;
		free(arg[2]);
		arg[2] = 0;
		carg[0] = 0;
		return 0;
	} else {
		free(arg[0]);
		arg[0] = 0;
		carg[2] = 0;
		free(arg[1]);
		arg[1] = 0;
		carg[1] = 0;
		free(arg[2]);
		arg[2] = 0;
		carg[0] = 0;
		size_t len = binp - instr;
		rz_strbuf_setbin(&op->buf, instr, len);
		return binp - instr;
	}
}
